take home exercise 2 Singapore public bus commuter flows

Author

Xu Lin

#overview We currently have data on people’s movement patterns as well as information on schools, businesses, and retail activities in different areas. The goal of this task is to identify common Saturday activities among the population. Through our analysis, we aim to provide recommendations to the government, suggesting potential enhancements to existing facilities or proposing the development of new amenities that align with people’s preferences. The objective is to make these facilities more appealing and strategically located for the community’s convenience.

#Objective Our goal is to pinpoint popular weekend destinations, analyze the main facilities in those areas, and provide recommendations accordingly.

#Data Geospatial data: Passenger Volume by Origin Destination Bus Stops, Bus Stop Location, Train Station and Train Station Exit Point, Master Plan 2019 Subzone Boundary, HDB Property Information, Business, Entertn, F&B, FinServ, Leisure&Recreation and Retails. Aspatial data: HDB Property Information. This data is for us to use.

#Import the data

pacman::p_load(tmap, sf, DT, stplanr,
               performance,
               ggpubr, tidyverse, xml, mapview)
Warning: package 'xml' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Warning: Perhaps you meant 'XML' ?
Warning: 'BiocManager' not available.  Could not check Bioconductor.

Please use `install.packages('BiocManager')` and then retry.
Warning in p_install(package, character.only = TRUE, ...):
Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
logical.return = TRUE, : there is no package called 'xml'
Warning in pacman::p_load(tmap, sf, DT, stplanr, performance, ggpubr, tidyverse, : Failed to install/load:
xml

#Importing the OD data

odbus <- read_csv("data/aspatial/origin_destination_bus_202310.csv")
Rows: 5694297 Columns: 7
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (5): YEAR_MONTH, DAY_TYPE, PT_TYPE, ORIGIN_PT_CODE, DESTINATION_PT_CODE
dbl (2): TIME_PER_HOUR, TOTAL_TRIPS

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
odbus$ORIGIN_PT_CODE <- as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <- as.factor(odbus$DESTINATION_PT_CODE) 
glimpse(odbus)
Rows: 5,694,297
Columns: 7
$ YEAR_MONTH          <chr> "2023-10", "2023-10", "2023-10", "2023-10", "2023-…
$ DAY_TYPE            <chr> "WEEKENDS/HOLIDAY", "WEEKDAY", "WEEKENDS/HOLIDAY",…
$ TIME_PER_HOUR       <dbl> 16, 16, 14, 14, 17, 17, 17, 7, 14, 14, 10, 20, 20,…
$ PT_TYPE             <chr> "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "BUS", "…
$ ORIGIN_PT_CODE      <fct> 04168, 04168, 80119, 80119, 44069, 20281, 20281, 1…
$ DESTINATION_PT_CODE <fct> 10051, 10051, 90079, 90079, 17229, 20141, 20141, 1…
$ TOTAL_TRIPS         <dbl> 3, 5, 3, 5, 4, 1, 24, 2, 1, 7, 3, 2, 5, 1, 1, 1, 1…
weekendmorning11_14 <- odbus %>%
  filter(DAY_TYPE == "WEEKENDS/HOLIDAY") %>%
  filter(TIME_PER_HOUR >= 11 & TIME_PER_HOUR <= 14) %>%
  group_by(ORIGIN_PT_CODE, DESTINATION_PT_CODE) %>%
  summarise(TRIPS = sum(TOTAL_TRIPS), .groups = 'keep')
datatable(weekendmorning11_14)
Warning in instance$preRenderHook(instance): It seems your data is too big for
client-side DataTables. You may consider server-side processing:
https://rstudio.github.io/DT/server.html
write_rds(weekendmorning11_14, "data/rds/weekendmorning11_14.rds")
weekendmorning11_14 <- read_rds("data/rds/weekendmorning11_14.rds")

#Working with Geospatial Data

busstop <- st_read(dsn = "data/geospatial", layer = "BusStop") %>%
  st_transform(crs = 3414)
Reading layer `BusStop' from data source 
  `/Users/linxu/ISSS624/take home exercise 2/data/geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 5161 features and 3 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
glimpse(busstop)
Rows: 5,161
Columns: 4
$ BUS_STOP_N <chr> "22069", "32071", "44331", "96081", "11561", "66191", "2338…
$ BUS_ROOF_N <chr> "B06", "B23", "B01", "B05", "B05", "B03", "B02A", "B02", "B…
$ LOC_DESC   <chr> "OPP CEVA LOGISTICS", "AFT TRACK 13", "BLK 239", "GRACE IND…
$ geometry   <POINT [m]> POINT (13576.31 32883.65), POINT (13228.59 44206.38),…
busstop_points = busstop %>%
  st_as_sf(coords = c("geometry"), crs = 3414, remove = FALSE)
mapview_busstop_points = mapview(busstop_points, cex = 0.5, alpha = .5, popup = NULL)
mapview_busstop_points
area_honeycomb_grid = st_make_grid(busstop_points, c(375, 375), what = "polygons", square = FALSE)
honeycomb_grid_sf = st_sf(area_honeycomb_grid) %>%
  mutate(grid_id = 1:length(lengths(area_honeycomb_grid)))
honeycomb_grid_sf$n_colli = lengths(st_intersects(honeycomb_grid_sf, busstop_points))
honeycomb_count = filter(honeycomb_grid_sf, n_colli > 0)
tmap_mode("view")
tmap mode set to interactive viewing
map_honeycomb = tm_shape(honeycomb_count) +
  tm_fill(
    col = "n_colli",
    palette = "Reds",
    style = "cont",
    title = "Number of collisions",
    id = "grid_id",
    showNA = FALSE,
    alpha = 0.6,
    popup.vars = c(
      "Number of collisions: " = "n_colli"
    ),
    popup.format = list(
      n_colli = list(format = "f", digits = 0)
    )
  ) +
  tm_borders(col = "grey40", lwd = 0.7)
map_honeycomb
busstop_honeycomb_count <- st_intersection(busstop, honeycomb_count) %>%
  select(BUS_STOP_N, grid_id) %>%
  st_drop_geometry()
Warning: attribute variables are assumed to be spatially constant throughout
all geometries
glimpse(busstop_honeycomb_count)
Rows: 5,162
Columns: 2
$ BUS_STOP_N <chr> "25059", "25059", "25751", "26379", "26369", "25761", "2638…
$ grid_id    <int> 3, 86, 170, 173, 174, 211, 214, 255, 255, 258, 295, 296, 29…
write_rds(busstop_honeycomb_count, "data/rds/busstop_honeycomb_count.rds")  
od_data <- left_join(weekendmorning11_14 , busstop_honeycomb_count,
            by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
  rename(ORIGIN_BS = ORIGIN_PT_CODE,
         ORIGIN_SZ = grid_id,
         DESTIN_BS = DESTINATION_PT_CODE)
Warning in left_join(weekendmorning11_14, busstop_honeycomb_count, by = c(ORIGIN_PT_CODE = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 27736 of `x` matches multiple rows in `y`.
ℹ Row 3165 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
duplicate <- od_data %>%
  group_by_all() %>%
  filter(n()>1) %>%
  ungroup()
od_data <- unique(od_data)
od_data <- left_join(od_data , busstop_honeycomb_count,
            by = c("DESTIN_BS" = "BUS_STOP_N")) 
Warning in left_join(od_data, busstop_honeycomb_count, by = c(DESTIN_BS = "BUS_STOP_N")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 206 of `x` matches multiple rows in `y`.
ℹ Row 3203 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
glimpse(od_data)
Rows: 223,522
Columns: 5
Groups: ORIGIN_BS, DESTIN_BS [222,154]
$ ORIGIN_BS <chr> "01012", "01012", "01012", "01012", "01012", "01012", "01012…
$ DESTIN_BS <chr> "01112", "01113", "01121", "01211", "01311", "01549", "01559…
$ TRIPS     <dbl> 265, 189, 120, 141, 218, 1, 7, 16, 8, 8, 58, 21, 7, 3, 6, 1,…
$ ORIGIN_SZ <int> 5859, 5859, 5859, 5859, 5859, 5859, 5859, 5859, 5859, 5859, …
$ grid_id   <int> 5901, 5943, 5985, 6068, 6110, 5984, 5943, 6110, 6111, 6153, …